
do "E:/ReplicateBuild/02_code/00_environment/00_set_environment.do"

*Table of Contents
local tchXwalk = 1
local tchEmployees = 1
	local us_zip_data = 1
	local teach_geocodes = 1
local tchEducation = 1
local tchLicense = 1
local tchEVAAS = 1
	local dataCheck = 1
	

********************************************************************************	
********************************************************************************
* Read in information provided by the district on current teachers
********************************************************************************
********************************************************************************

********************************************************************************
* Read in employee - NCERDC crosswalk
********************************************************************************
if `tchXwalk' == 1 {
	********************************************************************************
	* Create dataset of all unique employees with NCERDC IDs
	* Identifier: FOCALEmployeeID
	********************************************************************************
	use "$rawdata/FOCAL/employees_crosswalk", clear
	
	duplicates drop
	
	ren (EmployeeID matchtype teachid) (FOCALEmployeeID matchtype_FOCAL_emp ncerdc_id)
	
	assert FOCALEmployeeID != .
	assert FOCALEmployeeID !=0
	
	sort FOCALEmployeeID
	
	compress
	
	save "$basedata/FOCAL_employees_crosswalk", replace
	
}

********************************************************************************
* Read in teacher biographical info, including location and FOCAL ID
********************************************************************************
if `tchEmployees' == 1 {
	********************************************************************************
	* (tempfile) Generate lat/long and zipcode crosswalk
	* Identifier: zip5
	********************************************************************************
	if `us_zip_data' == 1 {
		import excel using "$auxdata/us-zip-code-latitude-and-longitude.xlsx", firstrow clear
		ren (Zip Latitude Longitude) (zip5 latitude longitude)
		keep zip5 latitude longitude

		
		sort zip5
		isid zip5
		
		tempfile tempZip
		save `tempZip', replace
	}

		********************************************************************************
		* (tempfile) Generate lat/long and zipcode crosswalk
		* Identifier: zip5
		********************************************************************************
		if `teach_geocodes' == 1{
			import excel "$rawdata/FOCAL/Lawson pub.xlsx", firstrow clear sheet("Lawson Employees")
			ren Zip zip
			

			replace zip = "" if strpos(zip,"CR04")>0
			/*replace zip = "XXX" if zip=="XXX" */ // replacing value with XXX for confidentiality
			gen lenzip = length(zip)
			gen zip9 = zip if lenzip==10 & strpos(zip,"-")>0
			assert strpos(zip,"-")==0 if zip9==""
			
			gen zipasdate = strpos(zip,"/")>0
			assert zipasdate==1 if zip9=="" & zip!=""
			gen zipdate = date(zip,"MDY") - mdy(1,1,1900) + 2 if zipasdate==1 // convert excel to stata

			gen zip5 = substr(zip9,1,5)
			destring zip5, replace
			replace zip5 = zipdate if zip5==. & zipdate!=.
			count if zip5==.
			assert r(N)==1 // one missing zip (Bermuda)
			
			keep zip5
			sort zip5
			duplicates drop

			merge 1:1 zip5 using `tempZip'
			drop if _m==2
			tab _m if zip5!=.
			tab zip5 if _m==1
			drop _m


			ren latitude teacher_lawson_latitude
			ren longitude teacher_lawson_longitude

			sort zip5

			tempfile tempGeo
			save `tempGeo', replace
		}
	********************************************************************************
	* Create dataset of all FOCAL teachers with biographical data.
	* Identifier: FOCALEmployeeID
	********************************************************************************
	import excel "$rawdata/FOCAL/Lawson pub.xlsx", firstrow clear sheet("Lawson Employees")

	ren (EmployeeID City State Zip HiredDate RehiredDate PreviousEmployed CareerStatus StateRate ///
		NationalBoard CertifiedType CertificationEffectiveDate CertExpireDate CertIssueDate JobStatus ///
		PositionCode JobTitle JobCategory PLDesc PersonAlternateID) ///
		(FOCALEmployeeID city state zip hired_date rehired_date previous_employed career_status state_rate ///
		national_board certified_type_str certification_effective_date cert_expire_date cert_issue_date job_status_str ///
		position_code job_title job_category pl_desc PersonAlternateID)
		
	replace position_code = position_code - mdy(1,1,1900) + 2 // convert excel to stata
	format position_code %5.0g

	* clean up some variables and convert strings to binary categories
	foreach var in "cert_issue_date" {
		replace `var' = "" if trim(`var')=="1800-01-01"
		ren `var' `var'_str
		gen `var' = date(`var'_str,"MDY")
		assert `var'_str == "" if `var'==.
		drop `var'_str
		format `var' %d
	}
	
	foreach var in "city" "state" "job_title" {
		replace `var' = trim(upper(`var'))
	}
	
	foreach var in "previous_employed" "national_board" {
		ren `var' `var'_str
		gen `var' = (`var'_str=="Yes") if `var'_str!=""
		drop `var'_str
	}
	
	gen full_time = strpos(job_status_str,"Full Time")>0
	
	foreach var in "certified_type" "job_status" {
		encode `var'_str, gen(`var')
		drop `var'_str
	}

	
	tab job_category // note: Title I is a specific category
	
	* fix and destring zip
	replace state = "BM" if strpos(zip,"CR04")>0
	replace zip = "" if strpos(zip,"CR04")>0
	replace city = "HAMILTON" if strpos(city,"HAMILTON, BERMUDA")>0

	/* replace zip = "XXX" if zip=="XXX" */ // replacing values with XXX to preserve confidentiality
	
	gen lenzip = length(zip)
	gen zip9 = zip if lenzip==10 & strpos(zip,"-")>0
	assert strpos(zip,"-")==0 if zip9==""
	
	gen zipasdate = strpos(zip,"/")>0
	assert zipasdate==1 if zip9=="" & zip!=""
	gen zipdate = date(zip,"MDY") - mdy(1,1,1900) + 2 if zipasdate==1 // convert excel to stata

	gen zip5 = substr(zip9,1,5)
	destring zip5, replace
	replace zip5 = zipdate if zip5==. & zipdate!=.
	count if zip5==.
	assert r(N)==1 // one missing zip (Bermuda)

	drop zip lenzip zipdate zipasdate
	
	sort zip5
	merge n:1 zip5 using `tempGeo'
	assert _m!=2
	drop _m
	
	
	if `dataCheck' ==1 {
		duplicates report FOCALEmployeeID
		duplicates report PersonAlternateID // both unique
	}
	
	* merge on identifiers crosswalk

	sort FOCALEmployeeID
	merge 1:1 FOCALEmployeeID using "$basedata/FOCAL_employees_crosswalk"
	assert _m==3
	drop _m
	
	sort FOCALEmployeeID
	
	compress
	
	save "$basedata/FOCAL_teacher_bio", replace
	
}


********************************************************************************
* Read in teacher's education, license, EVAAS from the district
********************************************************************************

** Education
if `tchEducation' == 1 {
	********************************************************************************
	* Create dataset of teachers' education
	* Identifier: FOCALEmployeeID, educ_event_id
	********************************************************************************
	import excel "$rawdata/FOCAL/Lawson pub.xlsx", firstrow clear sheet("Lawson Education")
	
	ren (EmployeeID DegreeCode Degree InstitutionCode Institution SOURCE) ///
		(FOCALEmployeeID degree_code degree_str institution_code institution source)

	drop degree_code
	encode degree_str, gen(degree)
	drop degree_str
	
	assert source=="Lawson"
	drop source
	
	replace institution = trim(upper(institution))
	replace institution_code = trim(institution_code)
	
	duplicates drop

	local sortvars = "FOCALEmployeeID degree institution_code"
	
	
	if `dataCheck' ==1 {
	* examine duplicates of applicant_id
		duplicates report FOCALEmployeeID // lots of duplicates
		duplicates tag `sortvars', gen(dupvar)
		assert dupvar==0
		drop dupvar

	}

	sort `sortvars'

	by FOCALEmployeeID: gen educ_event_id = _n
	
	compress
	
	save "$basedata/FOCAL_teacher_education", replace
	
}


** License
if `tchLicense' == 1 {
	********************************************************************************
	* Create dataset of teachers' licenses
	* Identifier: FOCALEmployeeID, cert_event_id
	********************************************************************************
	import excel "$rawdata/FOCAL/Lawson pub.xlsx", firstrow clear sheet("Certification")
	
	
	ren (EmployeeID CertificationArea CertificationType CertificationDescription STATUS SchoolYear) ///
		(FOCALEmployeeID certification_area certification_type certification_description status school_year)

	gen active_cert = (status=="Active")
	drop status
	
	assert school_year==20192020 // note: all certification as of 2019-2020
	drop school_year
	
	replace certification_type = trim(certification_type)
	replace certification_description = trim(certification_description)

	
	duplicates drop
	
	local sortvars = "FOCALEmployeeID certification_type certification_area"
	
	if `dataCheck' ==1 {
	* examine duplicates of applicant_id
		duplicates report FOCALEmployeeID // lots of duplicates
		duplicates tag `sortvars', gen(dupvar)
		assert dupvar==0
		drop dupvar

	}

	sort `sortvars'

	by FOCALEmployeeID: gen cert_event_id = _n
	

	sort FOCALEmployeeID cert_event_id

	
	compress
	
	save "$basedata/FOCAL_teacher_license", replace
	
}


** EVAAS
if `tchEVAAS' == 1 {
	local dataCheck = 1
	********************************************************************************
	* Create dataset of teachers' EVAAS scores
	* Identifier: FOCALEmployeeID, evaas_event_id (== group(evaas_school_year evaas_subject evaas_grade evaas_year))
	********************************************************************************
	import excel "$rawdata/FOCAL/Lawson pub.xlsx", firstrow clear sheet("EVAAS")
	
	ren (PersonAlternateID EmployeeID District DistrictNumber School SchoolNumber ///
		Test Subject Grade Year GrowthMeasure StandardError Index Level SchoolYear) ///
		(PersonAlternateID FOCALEmployeeID district district_number evaas_school evaas_school_number ///
		evaas_test evaas_subject evaas_grade evaas_score_year growth_measure standard_error ///
		index evaas_level evaas_school_year)

	drop PersonAlternateID // only appears jointly with FOCALEmployeeID, with a 1-to-1 relationship
	
	drop district district_number
	
	assert evaas_test=="End of Grade"
	drop evaas_test
	
	replace evaas_subject = substr(evaas_subject,1,1)
	
	gen evaas_exceeds = (evaas_level=="Exceeds Expected Growth")
	gen evaas_meets = (evaas_level=="Meets Expected Growth")
	gen evaas_not_meets = (evaas_level=="Does Not Meet Expected Growth")
	drop evaas_level
	
	assert evaas_school_year==20182019 // just one year of data
	
	gen index_test = index-growth_measure/standard_error
	summ index_test, d // big differences are due to rounding
	drop index_test

	
	duplicates drop

	local sortvars = "FOCALEmployeeID evaas_school_year evaas_subject evaas_grade evaas_score_year"
	
	if `dataCheck' ==1 {
	* examine duplicates
		duplicates report FOCALEmployeeID // lots of duplicates
		duplicates tag `sortvars', gen(dupvar)
		assert dupvar==0
		drop dupvar

	}

	sort `sortvars'

	by FOCALEmployeeID: gen evaas_event_id = _n
	
	compress
	
	save "$basedata/FOCAL_teacher_evaas", replace
	
	** Add a second version that is teacher-year specific
	
	gen sy = evaas_score_year
	replace sy = "2019" if inlist(evaas_score_year,"2-Yr-Avg","3-Yr-Avg")
	destring sy, replace

	gen evaas_math_dum = (evaas_subject=="M")
	forv gg=3/8 {
		gen evaas_grade`gg'_dum = (evaas_grade==`gg')
	}

	gen evaas_index_2yr = index if inlist(evaas_score_year,"2-Yr-Avg")
	gen evaas_index_3yr = index if inlist(evaas_score_year,"3-Yr-Avg")

	foreach var in "growth_measure" "standard_error" "index" "evaas_exceeds" "evaas_meets" "evaas_not_meets" "evaas_math_dum" "evaas_grade3_dum" ///
		"evaas_grade4_dum" "evaas_grade5_dum" "evaas_grade6_dum" "evaas_grade7_dum" "evaas_grade8_dum"  {
		replace `var' = . if inlist(evaas_score_year,"2-Yr-Avg","3-Yr-Avg")
	}

	gen num_evaas_measures = (index!=.)

	collapse (sum) num_evaas_measures (mean) growth_measure standard_error index evaas_exceeds evaas_meets evaas_not_meets evaas_math_dum evaas_grade*_dum evaas_index_2yr evaas_index_3yr, by(FOCALEmployeeID sy)

	save "$basedata/FOCAL_teacher_year_evaas", replace
	*******************************************
	****Add EVAAS measures from 2014-2016******
	import excel "$rawdata/FOCAL/EVAAS14_16.xlsx", firstrow clear 
		ren (UniqueID employeeid District DistrictNumber School SchoolNumber ///
		Test Subject Grade Year GrowthMeasure StandardError Index Level ) ///
		(UniqueID FOCALEmployeeID district district_number evaas_school evaas_school_number ///
		evaas_test evaas_subject evaas_grade evaas_score_year growth_measure standard_error ///
		index evaas_level )
		
		drop UniqueID // only appears jointly with FOCALEmployeeID, with a 1-to-1 relationship
	
	drop district district_number
	
	drop if  evaas_test!="End of Grade"
	drop evaas_test
	
	replace evaas_subject = substr(evaas_subject,1,1)
	
	gen evaas_exceeds = (evaas_level=="Exceeds Expected Growth")
	gen evaas_meets = (evaas_level=="Meets Expected Growth")
	gen evaas_not_meets = (evaas_level=="Does Not Meet Expected Growth")
	drop evaas_level
	drop if FOCALEmployeeID=="NULL"
	destring growth_measure FOCALEmployeeID standard_error evaas_grade,replace

		duplicates drop

	local sortvars = "FOCALEmployeeID evaas_score_year evaas_subject evaas_grade "
	
	*if `dataCheck' ==1 {
	* examine duplicates
		duplicates report FOCALEmployeeID // lots of duplicates
		duplicates tag `sortvars', gen(dupvar)
		assert dupvar==0
		drop dupvar

	*}
	local sortvars = "FOCALEmployeeID evaas_score_year evaas_subject evaas_grade "

	sort `sortvars'

	by FOCALEmployeeID: gen evaas_event_id = _n
	
	compress
	
	save "$basedata/temp/FOCAL_teacher_evaas", replace
	
	** Add a second version that is teacher-year specific
	
	gen sy = evaas_score_year
	destring sy, replace

	gen evaas_math_dum = (evaas_subject=="M")
	forv gg=3/8 {
		gen evaas_grade`gg'_dum = (evaas_grade==`gg')
	}



	gen num_evaas_measures = (index!=.)

	collapse (sum) num_evaas_measures (mean) growth_measure standard_error index evaas_exceeds evaas_meets evaas_not_meets evaas_math_dum evaas_grade*_dum , by(FOCALEmployeeID sy)

	append using "$basedata/FOCAL_teacher_year_evaas"
	
	save "$basedata/FOCAL_teacher_year_evaas", replace
}
